suppressWarnings(suppressMessages({
  library(ggplot2)
  library(data.table)
  library(dplyr)
  library(reshape2)
  library(rprojroot)
  library(patchwork)
}
))

F = is_rstudio_project$make_fix_file()

Introduction

Here, we generate PCA plots for genotypes simulated under different demographic histories (recent and perpetual) and using different sets of variants (rare and common).

Implementation

Read eigenvectors


com_9<-fread(
  F("data/gwas/grid/genotypes/tau-9/ss500/train/genotypes/genos_grid_d36_m0.07_s500_t9_chr1_20.rmdup.train.snps.cm.200k.pca.eigenvec"),
  header = TRUE)

rare_9<-fread(
  F("data/gwas/grid/genotypes/tau-9/ss500/train/genotypes/genos_grid_d36_m0.07_s500_t9_chr1_20.rmdup.train.snps.re.1M.pca.eigenvec"),
  header = TRUE)

com100<-fread(
  F("data/gwas/grid/genotypes/tau100/ss500/train/genotypes/genos_gridt100_l1e7_ss750_m0.05_chr1_20.rmdup.train.cm.200k.eigenvec"),
  header = TRUE)

rare100<-fread(
  F("data/gwas/grid/genotypes/tau100/ss500/train/genotypes/genos_gridt100_l1e7_ss750_m0.05_chr1_20.rmdup.train.re.all.eigenvec"),
  header = TRUE)


colnames(com_9) <- colnames(rare_9) <- colnames(com100) <- colnames(rare100) <- c("FID","IID", paste("PC", seq(1,100), sep = ""))

Read ‘pop’ files, which describe which deme each individual was sampled from.


pop9 = fread(F("data/gwas/grid/genotypes/tau-9/ss500/genos_grid_d36_m0.07_s500_t9.train.pop"))

pop100 = fread(F("data/gwas/grid/genotypes/tau100/ss500/iid_train.txt"))

colnames(pop9) = colnames(pop100) = c("FID","IID","deme","longitude","latitude")

Define bivariate color scheme for points


#define bivariate color scheme for pca
d<-expand.grid(lon=1:6,lat=1:6)
d$deme<-seq(1:36)-1


r1 <- with(d,(lon-min(lon))/(max(lon)-min(lon)))
r2 <- with(d,(lat-min(lat))/(max(lat)-min(lat)))
r3 <- 1-sqrt((r1^2+r2^2)/2)
cc <- rbind(r1, r2, r3, 0.5)
rownames(cc) <- c("red", "green", "blue", "alpha")
cols <- rgb(t(cc))
d$cols=cols

pop9 = merge(pop9,d,by="deme")
pop100 = merge(pop100,d,by="deme")

com_9<-merge(com_9,pop9,by=c("FID","IID"))
com100<-merge(com100,pop100,by=c("FID","IID"))
rare_9<-merge(rare_9,pop9,by=c("FID","IID"))
rare100<-merge(rare100,pop100,by=c("FID","IID"))

Plot the first two PCs calculated from common and rare variants simulated under the recent and perpetual structure model.

plt_common_9<-ggplot(com_9,aes(PC1,PC2,color=as.factor(deme)))+
  geom_point(alpha=0.5, size=0.1)+
  theme_bw()+
  scale_color_manual(values=cols)+
  theme(legend.position="none",
        axis.text=element_text(size=10),
        panel.grid = element_blank(),
        plot.margin = unit(rep(0.5,4), "pt"),
        plot.background = element_blank())+
  labs(title = "Perpetual structure \n (MAF > 5%)")

plt_rare_9<-ggplot(rare_9,aes(PC1,PC2,color=as.factor(deme)))+
  geom_point(alpha=0.5, size=0.1)+
  theme_bw()+
  scale_color_manual(values=cols)+
  theme(legend.position="none",
        axis.text = element_text(size=10),
        panel.grid=element_blank(),
        plot.margin = unit(rep(0.5,4), "pt"))+
  labs(title = "Perpetual structure \n (MAC = 2,3,4)")

plt_common100<-ggplot(com100,aes(PC1,PC2,color=as.factor(deme)))+
  geom_point(alpha=0.5, size=0.1)+
  theme_bw()+
  scale_color_manual(values=cols)+
  theme(legend.position="none",
        axis.text=element_text(size=10),
        panel.grid=element_blank(),
        plot.margin = unit(rep(0.5,4), "pt"),
        plot.background=element_blank())+
  labs(title = "Recent structure \n (MAF > 5%)")


plt_rare100<-ggplot(rare100,aes(PC1,PC2,color=as.factor(deme)))+
  geom_point(alpha=0.5, size=0.1)+
  theme_bw()+
  scale_color_manual(values=cols)+
  theme(legend.position="none",
        axis.text=element_text(size=10),
        panel.grid=element_blank(),
        plot.margin = unit(rep(0.5,4), "pt"),
        plot.background = element_blank())+
  labs(title = "Recent structure \n (MAC = 2,3,4)")

plt_combined <- (plt_common_9 + plt_common100) / (plt_rare_9 + plt_rare100)

plt_combined

Now let’s plot the first two PCs calculated using IBD sharing.


ibd = fread(
  F("data/gwas/grid/genotypes/tau100/ss250/germline/germ.cm_ibd.eigenvec"))

pop100$iid = paste("msp_",seq(1,9000,1),sep="")

colnames(ibd) = c("FID","IID",
                  paste("PC",seq(1,20,1),sep=""))

# ibd2 = ibd[,c("msp","iid") := tstrsplit(IID, split = "_")]
# ibd2[,iid:=paste("tsk",as.numeric(iid)-1,sep="_")]

ibd2 = merge(pop100,
           ibd,
           by.x = "iid",
           by.y = "IID",all.x=TRUE,sort=FALSE)

plt_ibd1 = ggplot(ibd2,aes(PC1,PC2,color=as.factor(deme)))+
  geom_point(show.legend = FALSE,
             alpha=0.5, size=0.1)+
  scale_color_manual(values=cols)+
  theme_bw()+
  theme(legend.position="none",
        axis.text=element_text(size=10),
        plot.title = element_text(size = 11),
        axis.title = element_text(size = 11),
        panel.grid=element_blank())+
  labs(x="PC2",y="PC3",
       title="PCA on IBD-sharing")

plt_ibd2 = ggplot(ibd2,aes(PC3,PC4,color=as.factor(deme)))+
  geom_point(show.legend = FALSE,
             alpha=0.5, size=0.1)+
  scale_color_manual(values=cols)+
  theme_bw()+
  theme(legend.position="none",
        axis.text=element_text(size=10),
        plot.title = element_text(size = 11),
        axis.title = element_text(size = 11),
        panel.grid=element_blank())+
  labs(x="PC2",y="PC3",
       title="PCA on IBD-sharing")

plt_ibd1 + plt_ibd2

LS0tCnRpdGxlOiAiUGxvdHRpbmcgUENBIHVuZGVyIGRpZmZlcmVudCBkZW1vZ3JhcGhpYyBoaXN0b3JpZXMiCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CmVkaXRvcl9vcHRpb25zOgogIGNodW5rX291dHB1dF90eXBlOiBjb25zb2xlCi0tLQoKYGBge3Igc2V0dXB9CgpzdXBwcmVzc1dhcm5pbmdzKHN1cHByZXNzTWVzc2FnZXMoewogIGxpYnJhcnkoZ2dwbG90MikKICBsaWJyYXJ5KGRhdGEudGFibGUpCiAgbGlicmFyeShkcGx5cikKICBsaWJyYXJ5KHJlc2hhcGUyKQogIGxpYnJhcnkocnByb2pyb290KQogIGxpYnJhcnkocGF0Y2h3b3JrKQp9CikpCgpGID0gaXNfcnN0dWRpb19wcm9qZWN0JG1ha2VfZml4X2ZpbGUoKQpgYGAKCiMjIyBJbnRyb2R1Y3Rpb24KCkhlcmUsIHdlIGdlbmVyYXRlIFBDQSBwbG90cyBmb3IgZ2Vub3R5cGVzIHNpbXVsYXRlZCB1bmRlciBkaWZmZXJlbnQgZGVtb2dyYXBoaWMgaGlzdG9yaWVzIChyZWNlbnQgYW5kIHBlcnBldHVhbCkgYW5kIHVzaW5nIGRpZmZlcmVudCBzZXRzIG9mIHZhcmlhbnRzIChyYXJlIGFuZCBjb21tb24pLgoKIyMjIEltcGxlbWVudGF0aW9uCgpSZWFkIGVpZ2VudmVjdG9ycwoKYGBge3J9Cgpjb21fOTwtZnJlYWQoCiAgRigiZGF0YS9nd2FzL2dyaWQvZ2Vub3R5cGVzL3RhdS05L3NzNTAwL3RyYWluL2dlbm90eXBlcy9nZW5vc19ncmlkX2QzNl9tMC4wN19zNTAwX3Q5X2NocjFfMjAucm1kdXAudHJhaW4uc25wcy5jbS4yMDBrLnBjYS5laWdlbnZlYyIpLAogIGhlYWRlciA9IFRSVUUpCgpyYXJlXzk8LWZyZWFkKAogIEYoImRhdGEvZ3dhcy9ncmlkL2dlbm90eXBlcy90YXUtOS9zczUwMC90cmFpbi9nZW5vdHlwZXMvZ2Vub3NfZ3JpZF9kMzZfbTAuMDdfczUwMF90OV9jaHIxXzIwLnJtZHVwLnRyYWluLnNucHMucmUuMU0ucGNhLmVpZ2VudmVjIiksCiAgaGVhZGVyID0gVFJVRSkKCmNvbTEwMDwtZnJlYWQoCiAgRigiZGF0YS9nd2FzL2dyaWQvZ2Vub3R5cGVzL3RhdTEwMC9zczUwMC90cmFpbi9nZW5vdHlwZXMvZ2Vub3NfZ3JpZHQxMDBfbDFlN19zczc1MF9tMC4wNV9jaHIxXzIwLnJtZHVwLnRyYWluLmNtLjIwMGsuZWlnZW52ZWMiKSwKICBoZWFkZXIgPSBUUlVFKQoKcmFyZTEwMDwtZnJlYWQoCiAgRigiZGF0YS9nd2FzL2dyaWQvZ2Vub3R5cGVzL3RhdTEwMC9zczUwMC90cmFpbi9nZW5vdHlwZXMvZ2Vub3NfZ3JpZHQxMDBfbDFlN19zczc1MF9tMC4wNV9jaHIxXzIwLnJtZHVwLnRyYWluLnJlLmFsbC5laWdlbnZlYyIpLAogIGhlYWRlciA9IFRSVUUpCgoKY29sbmFtZXMoY29tXzkpIDwtIGNvbG5hbWVzKHJhcmVfOSkgPC0gY29sbmFtZXMoY29tMTAwKSA8LSBjb2xuYW1lcyhyYXJlMTAwKSA8LSBjKCJGSUQiLCJJSUQiLCBwYXN0ZSgiUEMiLCBzZXEoMSwxMDApLCBzZXAgPSAiIikpCgoKYGBgCgpSZWFkICdwb3AnIGZpbGVzLCB3aGljaCBkZXNjcmliZSB3aGljaCBkZW1lIGVhY2ggaW5kaXZpZHVhbCB3YXMgc2FtcGxlZCBmcm9tLgoKYGBge3J9Cgpwb3A5ID0gZnJlYWQoRigiZGF0YS9nd2FzL2dyaWQvZ2Vub3R5cGVzL3RhdS05L3NzNTAwL2dlbm9zX2dyaWRfZDM2X20wLjA3X3M1MDBfdDkudHJhaW4ucG9wIikpCgpwb3AxMDAgPSBmcmVhZChGKCJkYXRhL2d3YXMvZ3JpZC9nZW5vdHlwZXMvdGF1MTAwL3NzNTAwL2lpZF90cmFpbi50eHQiKSkKCmNvbG5hbWVzKHBvcDkpID0gY29sbmFtZXMocG9wMTAwKSA9IGMoIkZJRCIsIklJRCIsImRlbWUiLCJsb25naXR1ZGUiLCJsYXRpdHVkZSIpCgpgYGAKCkRlZmluZSBiaXZhcmlhdGUgY29sb3Igc2NoZW1lIGZvciBwb2ludHMKCmBgYHtyfQoKI2RlZmluZSBiaXZhcmlhdGUgY29sb3Igc2NoZW1lIGZvciBwY2EKZDwtZXhwYW5kLmdyaWQobG9uPTE6NixsYXQ9MTo2KQpkJGRlbWU8LXNlcSgxOjM2KS0xCgoKcjEgPC0gd2l0aChkLChsb24tbWluKGxvbikpLyhtYXgobG9uKS1taW4obG9uKSkpCnIyIDwtIHdpdGgoZCwobGF0LW1pbihsYXQpKS8obWF4KGxhdCktbWluKGxhdCkpKQpyMyA8LSAxLXNxcnQoKHIxXjIrcjJeMikvMikKY2MgPC0gcmJpbmQocjEsIHIyLCByMywgMC41KQpyb3duYW1lcyhjYykgPC0gYygicmVkIiwgImdyZWVuIiwgImJsdWUiLCAiYWxwaGEiKQpjb2xzIDwtIHJnYih0KGNjKSkKZCRjb2xzPWNvbHMKCnBvcDkgPSBtZXJnZShwb3A5LGQsYnk9ImRlbWUiKQpwb3AxMDAgPSBtZXJnZShwb3AxMDAsZCxieT0iZGVtZSIpCgpjb21fOTwtbWVyZ2UoY29tXzkscG9wOSxieT1jKCJGSUQiLCJJSUQiKSkKY29tMTAwPC1tZXJnZShjb20xMDAscG9wMTAwLGJ5PWMoIkZJRCIsIklJRCIpKQpyYXJlXzk8LW1lcmdlKHJhcmVfOSxwb3A5LGJ5PWMoIkZJRCIsIklJRCIpKQpyYXJlMTAwPC1tZXJnZShyYXJlMTAwLHBvcDEwMCxieT1jKCJGSUQiLCJJSUQiKSkKCmBgYAoKUGxvdCB0aGUgZmlyc3QgdHdvIFBDcyBjYWxjdWxhdGVkIGZyb20gY29tbW9uIGFuZCByYXJlIHZhcmlhbnRzIHNpbXVsYXRlZCB1bmRlciB0aGUgcmVjZW50IGFuZCBwZXJwZXR1YWwgc3RydWN0dXJlIG1vZGVsLiAKCmBgYHtyfQpwbHRfY29tbW9uXzk8LWdncGxvdChjb21fOSxhZXMoUEMxLFBDMixjb2xvcj1hcy5mYWN0b3IoZGVtZSkpKSsKICBnZW9tX3BvaW50KGFscGhhPTAuNSwgc2l6ZT0wLjEpKwogIHRoZW1lX2J3KCkrCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcz1jb2xzKSsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiLAogICAgICAgIGF4aXMudGV4dD1lbGVtZW50X3RleHQoc2l6ZT0xMCksCiAgICAgICAgcGFuZWwuZ3JpZCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBwbG90Lm1hcmdpbiA9IHVuaXQocmVwKDAuNSw0KSwgInB0IiksCiAgICAgICAgcGxvdC5iYWNrZ3JvdW5kID0gZWxlbWVudF9ibGFuaygpKSsKICBsYWJzKHRpdGxlID0gIlBlcnBldHVhbCBzdHJ1Y3R1cmUgXG4gKE1BRiA+IDUlKSIpCgpwbHRfcmFyZV85PC1nZ3Bsb3QocmFyZV85LGFlcyhQQzEsUEMyLGNvbG9yPWFzLmZhY3RvcihkZW1lKSkpKwogIGdlb21fcG9pbnQoYWxwaGE9MC41LCBzaXplPTAuMSkrCiAgdGhlbWVfYncoKSsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWNvbHMpKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIsCiAgICAgICAgYXhpcy50ZXh0ID0gZWxlbWVudF90ZXh0KHNpemU9MTApLAogICAgICAgIHBhbmVsLmdyaWQ9ZWxlbWVudF9ibGFuaygpLAogICAgICAgIHBsb3QubWFyZ2luID0gdW5pdChyZXAoMC41LDQpLCAicHQiKSkrCiAgbGFicyh0aXRsZSA9ICJQZXJwZXR1YWwgc3RydWN0dXJlIFxuIChNQUMgPSAyLDMsNCkiKQoKcGx0X2NvbW1vbjEwMDwtZ2dwbG90KGNvbTEwMCxhZXMoUEMxLFBDMixjb2xvcj1hcy5mYWN0b3IoZGVtZSkpKSsKICBnZW9tX3BvaW50KGFscGhhPTAuNSwgc2l6ZT0wLjEpKwogIHRoZW1lX2J3KCkrCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcz1jb2xzKSsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiLAogICAgICAgIGF4aXMudGV4dD1lbGVtZW50X3RleHQoc2l6ZT0xMCksCiAgICAgICAgcGFuZWwuZ3JpZD1lbGVtZW50X2JsYW5rKCksCiAgICAgICAgcGxvdC5tYXJnaW4gPSB1bml0KHJlcCgwLjUsNCksICJwdCIpLAogICAgICAgIHBsb3QuYmFja2dyb3VuZD1lbGVtZW50X2JsYW5rKCkpKwogIGxhYnModGl0bGUgPSAiUmVjZW50IHN0cnVjdHVyZSBcbiAoTUFGID4gNSUpIikKCgpwbHRfcmFyZTEwMDwtZ2dwbG90KHJhcmUxMDAsYWVzKFBDMSxQQzIsY29sb3I9YXMuZmFjdG9yKGRlbWUpKSkrCiAgZ2VvbV9wb2ludChhbHBoYT0wLjUsIHNpemU9MC4xKSsKICB0aGVtZV9idygpKwogIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXM9Y29scykrCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIiwKICAgICAgICBheGlzLnRleHQ9ZWxlbWVudF90ZXh0KHNpemU9MTApLAogICAgICAgIHBhbmVsLmdyaWQ9ZWxlbWVudF9ibGFuaygpLAogICAgICAgIHBsb3QubWFyZ2luID0gdW5pdChyZXAoMC41LDQpLCAicHQiKSwKICAgICAgICBwbG90LmJhY2tncm91bmQgPSBlbGVtZW50X2JsYW5rKCkpKwogIGxhYnModGl0bGUgPSAiUmVjZW50IHN0cnVjdHVyZSBcbiAoTUFDID0gMiwzLDQpIikKCnBsdF9jb21iaW5lZCA8LSAocGx0X2NvbW1vbl85ICsgcGx0X2NvbW1vbjEwMCkgLyAocGx0X3JhcmVfOSArIHBsdF9yYXJlMTAwKQoKcGx0X2NvbWJpbmVkCgpgYGAKTm93IGxldCdzIHBsb3QgdGhlIGZpcnN0IHR3byBQQ3MgY2FsY3VsYXRlZCB1c2luZyBJQkQgc2hhcmluZy4gCgpgYGB7cn0KCmliZCA9IGZyZWFkKAogIEYoImRhdGEvZ3dhcy9ncmlkL2dlbm90eXBlcy90YXUxMDAvc3MyNTAvZ2VybWxpbmUvZ2VybS5jbV9pYmQuZWlnZW52ZWMiKSkKCnBvcDEwMCRpaWQgPSBwYXN0ZSgibXNwXyIsc2VxKDEsOTAwMCwxKSxzZXA9IiIpCgpjb2xuYW1lcyhpYmQpID0gYygiRklEIiwiSUlEIiwKICAgICAgICAgICAgICAgICAgcGFzdGUoIlBDIixzZXEoMSwyMCwxKSxzZXA9IiIpKQoKIyBpYmQyID0gaWJkWyxjKCJtc3AiLCJpaWQiKSA6PSB0c3Ryc3BsaXQoSUlELCBzcGxpdCA9ICJfIildCiMgaWJkMlssaWlkOj1wYXN0ZSgidHNrIixhcy5udW1lcmljKGlpZCktMSxzZXA9Il8iKV0KCmliZDIgPSBtZXJnZShwb3AxMDAsCiAgICAgICAgICAgaWJkLAogICAgICAgICAgIGJ5LnggPSAiaWlkIiwKICAgICAgICAgICBieS55ID0gIklJRCIsYWxsLng9VFJVRSxzb3J0PUZBTFNFKQoKcGx0X2liZDEgPSBnZ3Bsb3QoaWJkMixhZXMoUEMxLFBDMixjb2xvcj1hcy5mYWN0b3IoZGVtZSkpKSsKICBnZW9tX3BvaW50KHNob3cubGVnZW5kID0gRkFMU0UsCiAgICAgICAgICAgICBhbHBoYT0wLjUsIHNpemU9MC4xKSsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWNvbHMpKwogIHRoZW1lX2J3KCkrCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIiwKICAgICAgICBheGlzLnRleHQ9ZWxlbWVudF90ZXh0KHNpemU9MTApLAogICAgICAgIHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDExKSwKICAgICAgICBheGlzLnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemUgPSAxMSksCiAgICAgICAgcGFuZWwuZ3JpZD1lbGVtZW50X2JsYW5rKCkpKwogIGxhYnMoeD0iUEMyIix5PSJQQzMiLAogICAgICAgdGl0bGU9IlBDQSBvbiBJQkQtc2hhcmluZyIpCgpwbHRfaWJkMiA9IGdncGxvdChpYmQyLGFlcyhQQzMsUEM0LGNvbG9yPWFzLmZhY3RvcihkZW1lKSkpKwogIGdlb21fcG9pbnQoc2hvdy5sZWdlbmQgPSBGQUxTRSwKICAgICAgICAgICAgIGFscGhhPTAuNSwgc2l6ZT0wLjEpKwogIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXM9Y29scykrCiAgdGhlbWVfYncoKSsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiLAogICAgICAgIGF4aXMudGV4dD1lbGVtZW50X3RleHQoc2l6ZT0xMCksCiAgICAgICAgcGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTEpLAogICAgICAgIGF4aXMudGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDExKSwKICAgICAgICBwYW5lbC5ncmlkPWVsZW1lbnRfYmxhbmsoKSkrCiAgbGFicyh4PSJQQzIiLHk9IlBDMyIsCiAgICAgICB0aXRsZT0iUENBIG9uIElCRC1zaGFyaW5nIikKCnBsdF9pYmQxICsgcGx0X2liZDIKCmBgYAoKCgo=